import  time,pymysql,pandas,re
from typing import Collection
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from datetime import datetime
settings = { 'host':'172.16.10.201','user':'zy001','port':3306,'password':'zy@123','database':'crawl-bk','charset' : 'utf8'}
conn = pymysql.connect(**settings)
cursor = conn.cursor()
option = ChromeOptions()



prefs = {
        'profile.default_content_setting_values': {
            'images': 2
        }
    }
# option.headless = True
option.add_experimental_option('prefs', prefs)
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--ignore-certificate-errors')

        

area_first_url = {
    '盘龙':'https://km.zu.fang.com/house-a016831/z62/',
    '五花':'https://km.zu.fang.com/house-a016832/z62/',
    '官渡':'https://km.zu.fang.com/house-a016830/z62/',
    '西山':'https://km.zu.fang.com/house-a016833/z62/',
    '安宁':'https://km.zu.fang.com/house-a01069/z62/',
    '宜良':'https://km.zu.fang.com/house-a011163/z62/',
    '呈贡':'https://km.zu.fang.com/house-a01071/z62/',
}



def save_list(area_first_url):
    for k,v in area_first_url.items():
        driver.get(v)
        for i in range(1,101):
            page = str(i)
            url = v[:-4]+'i3'+page+'-z62/'
            if i == 1:
                url =  v
            driver.get(url)
            # 判断此次请求是否需要验证
            while(redict_url in driver.current_url):
                driver.get(v)
                time.sleep(10)           
            info = driver.find_elements_by_xpath("//div[@class='houseList']/dl/dd")
            ls = []
            for i in info:
                title = i.find_element_by_xpath("./p/a").text
                href = i.find_element_by_xpath("./p/a").get_attribute('href')
                ls.append((title,'住宅',url,href,k,datetime.now()))
            sql = "insert into fangtianxia_rent_all_total(title,types,list_url,url,region,create_time) values(%s,%s,%s,%s,%s,%s)"
            cursor.executemany(sql,ls)
            conn.commit()
            # 日志记录
            s = "insert into crawl_log(tables,url,described,create_time) values(%s,%s,%s,%s)"
            tables = 'fangtianxia_rent_all_total'
            described = '完成第'+page+'页列表数据的抓取'
            row = (tables,url,described,datetime.now())
            cursor.execute(s,row)
            conn.commit()




if __name__ == '__main__':
    # 验证路由
    redict_url = 'http://search.fang.com/'
    driver = webdriver.Chrome(executable_path='chromedriver', options=option)
    save_list(area_first_url)
   
       
# def test():
    # driver = webdriver.Chrome(executable_path='chromedriver', options=option)
    # u = 'https://km.zu.fang.com/house-a01071/z62/'
    # u2 = 'https://km.zu.fang.com/'
    # driver.get(u)
    # driver.get(u2)
    # driver.close


    # # driver.close()
    # url2 = 'https://km.zu.fang.com/house-a016831/i33-z62/'
    # driver.get(u)
    # sql = 'select url from fangtianxia_rent_all_total'
    # cursor.execute(sql)
    # data = cursor.fetchall()
    # for i in data:
    #     url = i[0]
    #     driver.get(url)
    #     time.sleep(10)

    





conn.close()
cursor.close()
